{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import re\n",
    "\n",
    "class MyCrawler:\n",
    "    def __init__(self, filename):\n",
    "        self.filename = filename\n",
    "        self.headers =  {\n",
    "            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',\n",
    "        }\n",
    "    \n",
    "    def download(self, url):\n",
    "        r = requests.get(url, headers=self.headers)\n",
    "        return r.text\n",
    "    \n",
    "    def extract(self, content, pattern):\n",
    "        result = re.findall(pattern, content)\n",
    "        return result\n",
    "    \n",
    "    def save(self, info):\n",
    "        with open(self.filename, 'a', encoding='utf-8') as f:\n",
    "            for item in info:\n",
    "                f.write('|||'.join(item) + '\\n')\n",
    "    \n",
    "    def crawl(self, url, pattern, headers=None):\n",
    "        if headers:\n",
    "            self.headers.update(headers)\n",
    "        content = self.download(url)\n",
    "        info = self.extract(content, pattern)\n",
    "        self.save(info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from lxml import html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "douban_crawler = MyCrawler('douban.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "content = douban_crawler.download('https://book.douban.com/tag/?view=type')\n",
    "tree = html.fromstring(content)\n",
    "tag_url_matches = tree.xpath('//td/a/@href')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "tag_list = [tag_url[5:] for tag_url in tag_url_matches]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'小说'"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tag_list[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD\n"
     ]
    }
   ],
   "source": [
    "import urllib.parse\n",
    "print(urllib.parse.quote('人工智能'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=0&type=T\n",
      "Last Start ID:  500\n",
      "为什么: 关于因果关系的新科学\n",
      "智能时代: 大数据与智能革命重新定义未来\n",
      "生命3.0: 人工智能时代，人类的进化与重生\n",
      "哥德尔、艾舍尔、巴赫: 集异璧之大成\n",
      "Python深度学习\n",
      "仿生人会梦见电子羊吗？\n",
      "奇点临近: 当计算机智能超越人类\n",
      "智能商业\n",
      "复杂\n",
      "深度学习\n",
      "动手学深度学习\n",
      "机器学习\n",
      "深度学习推荐系统\n",
      "自然语言处理入门\n",
      "AI·未来\n",
      "人工智能的未来\n",
      "认知: 人行为背后的思维与智能\n",
      "深度学习入门: 基于Python的理论与实现\n",
      "本源\n",
      "天才与算法: 人脑与AI的数学思维\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=20&type=T\n",
      "人工智能: 一种现代的方法(第3版)(影印版)\n",
      "深度学习: 智能时代的核心驱动力量\n",
      "人工智能之不能\n",
      "统计学习方法\n",
      "人工智能\n",
      "机器人叛乱: 在达尔文时代找到意义\n",
      "Python神经网络编程\n",
      "终极算法: 机器学习和人工智能如何重塑世界\n",
      "复杂\n",
      "创造性思维: 人工智能之父马文·明斯基论教育\n",
      "统计学习方法（第2版）\n",
      "Pattern Recognition and Machine Learning\n",
      "人工智能基础（高中版）: 高中版\n",
      "智能计算系统\n",
      "人工科学: 复杂性面面观\n",
      "暗知识：机器认知如何颠覆商业和社会: 机器认知如何颠覆商业和社会\n",
      "智慧的疆界: 从图灵机到人工智能\n",
      "神经网络与深度学习\n",
      "心智社会: 从细胞到人工智能，人类思维的优雅解读\n",
      "人工智能时代: 人机共生下财富、工作与思维的大未来\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=40&type=T\n",
      "百面机器学习: 算法工程师带你去面试\n",
      "人工智能哲学\n",
      "集体智慧编程\n",
      "人工智能简史\n",
      "第二次机器革命: 数字化技术将如何改变我们的经济与社会\n",
      "第二次机器革命: 数字化技术将如何改变我们的经济与社会\n",
      "人工智能: 一种现代方法(第2版)(中文版)\n",
      "人工智能全球格局: 未来趋势与中国位势\n",
      "皇帝新脑: 有关电脑、人脑及物理定律\n",
      "人生算法\n",
      "人类的终极命运: 从旧石器时代到人工智能的未来\n",
      "数字思维\n",
      "无人驾驶: 人工智能将从颠覆驾驶开始，全面重构人类生活\n",
      "人类的认知: 思维的信息加工理论\n",
      "GEB——一条永恒的金带\n",
      "人工智能: 一种现代的方法\n",
      "深度学习：基于案例理解深度神经网络\n",
      "算法霸权: 数学杀伤性武器的威胁\n",
      "智能的本质 人工智能与机器人领域的64个大问题: 人工智能与机器人领域的64个大问题\n",
      "人工智能 （第2版）\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=60&type=T\n",
      "深入浅出图神经网络：GNN原理解析\n",
      "Bayesian Reasoning and Machine Learning\n",
      "The Book of Why: The New Science of Cause and Effect\n",
      "必然\n",
      "直觉泵和其他思考工具\n",
      "软件体的生命周期: 特德·姜科幻小说集\n",
      "智能时代: 当所有的机器都能学习思考，我们的生活会如何改变\n",
      "推荐系统实践\n",
      "AI极简经济学\n",
      "Learning From Data: A Short Course\n",
      "情感机器: 人类思维与人工智能的未来\n",
      "云球（第一部）\n",
      "科学的极致：漫谈人工智能\n",
      "量子计算机简史\n",
      "语音与语言处理: ：自然语言处理、计算语言学和语音识别导论\n",
      "艾伦·图灵传: 如谜的解谜者\n",
      "最有人性的“人”: 人工智能带给我们的启示\n",
      "被看见的力量: 快手是什么\n",
      "心智、语言和机器: 维特根斯坦哲学和人工智能科学的对话\n",
      "控制论: 或关于在动物和机器中控制和通信的科学\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=80&type=T\n",
      "Python深度学习：基于PyTorch\n",
      "如何创造思维: 人类思想所揭示出的奥秘\n",
      "错觉: AI如何通过数据挖掘误导我们\n",
      "人工智能产品经理——AI时代PM修炼手册\n",
      "机器崛起: 遗失的控制论历史\n",
      "Chatbot从0到1: 对话式交互设计实践指南\n",
      "未来地图: 技术、商业和我们的选择\n",
      "凸优化\n",
      "超级智能: 路线图、危险性与应对策略\n",
      "不会被机器替代的人: 智能时代的生存策略\n",
      "机器学习实战\n",
      "人生新算法: 用人工智能解读时间、幸运与财富\n",
      "图灵的秘密: 他的生平、思想及论文解读\n",
      "Reinforcement Learning: An Introduction (second edition)\n",
      "Hands-On Machine Learning with Scikit-Learn and TensorFlow: Concepts, Tools, and Techniques for Building Intelligent Systems\n",
      "脑机穿越: 脑机接口改变人类未来\n",
      "深入理解AutoML和AutoDL：构建自动化机器学习与深度学习平台\n",
      "智能战略\n",
      "计算机与人脑\n",
      "心灵的未来\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=100&type=T\n",
      "Artificial Intelligence: A Modern Approach\n",
      "知识图谱：概念与技术\n",
      "深度思考: 人工智能的终点与人类创造力的起点\n",
      "灵魂机器的时代: 当计算机超过人类智能时/开放人文\n",
      "人工智能的未来\n",
      "Tensorflow：实战Google深度学习框架\n",
      "智能浪潮: 增强时代来临\n",
      "KK三部曲: 失控＋科技想要什么＋必然\n",
      "智能革命: 迎接人工智能时代的社会、经济与文化变革\n",
      "机器学习\n",
      "Superintelligence: Paths, Dangers, Strategies\n",
      "人机平台：商业未来行动路线图\n",
      "人人都该懂的人工智能\n",
      "The Elements of Statistical Learning: Data Mining, Inference, and Prediction, Second Edition\n",
      "Learning OpenCV 3: Computer Vision in C++ with the OpenCV Library\n",
      "携程人工智能实践\n",
      "产品经理进阶：100个案例搞懂人工智能\n",
      "神经网络在应用科学和工程中的应用: 从基本原理到复杂的模式识别\n",
      "Deep Learning: Adaptive Computation and Machine Learning series\n",
      "黑镜: 科幻与悬疑的绝佳组合之书\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=120&type=T\n",
      "与机器人共舞\n",
      "Python机器学习基础教程\n",
      "Probabilistic Graphical Models: Principles and Techniques\n",
      "机器学习实战：基于Scikit-Learn和TensorFlow\n",
      "第四次革命\n",
      "计算机视觉: 一种现代方法 第二版\n",
      "神经网络设计\n",
      "Foundations of Machine Learning\n",
      "Information Theory, Inference and Learning Algorithms\n",
      "图解机器学习\n",
      "被人工智能操控的金融业: 人工知能が金融を支配する日\n",
      "强化学习（第2版）\n",
      "Godel, Escher, Bach: An Eternal Golden Braid\n",
      "统计自然语言处理基础\n",
      "基于深度学习的自然语言处理\n",
      "深度学习导论\n",
      "机器情人: 当情感被算法操控\n",
      "神经网络设计（原书第2版）\n",
      "数据挖掘导论: Introduction to Data Mining\n",
      "智能机器如何思考: 深度神经网络的秘密\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=140&type=T\n",
      "机器视觉\n",
      "统计学习理论\n",
      "人工智能导论: Introduction to Artificial Intelligence\n",
      "Deep Learning with Python\n",
      "Artificial Intelligence for Games, Second Edition: Intelligence for Games\n",
      "南京大学人工智能本科专业教育培养体系: 培养体系\n",
      "信息论、推理与学习算法\n",
      "TensorFlow：实战Google深度学习框架（第2版）\n",
      "计算机不能做什么: 人工智能的极限\n",
      "你一定爱读的人工智能简史\n",
      "人工智能产品经理：人机对话系统设计逻辑探究\n",
      "推荐系统\n",
      "硬战：人工智能时代的爆款产品\n",
      "人工智能哲学\n",
      "大脑的未来: 神经科学的愿景与隐忧\n",
      "我们最后的发明: 人工智能与人类时代的终结\n",
      "情感分析：挖掘观点、情感和情绪: 挖掘观点、情感和情绪\n",
      "实用多元统计分析\n",
      "The Singularity Is Near: When Humans Transcend Biology\n",
      "决战大数据（升级版）: 大数据的关键思考\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=160&type=T\n",
      "分布式机器学习：算法、理论与实践\n",
      "给孩子的人工智能图解: 明天开始就想用上的68个关键词\n",
      "知识图谱：方法、实践与应用\n",
      "“AI失业”时代生存指南: 未来5年在职场会发生什么\n",
      "AI赋能：AI重新定义产品经理\n",
      "艾比斯之梦\n",
      "合作的复杂性: 基于参与者的竞争与合作模型\n",
      "微粒社会\n",
      "人工智能: 复杂问题求解的结构和策略\n",
      "深度学习与图像识别：原理与实践: 学习图像识别，这本书轻松带你从0到100！阿里巴巴达摩院算法专家领衔\n",
      "对冲之王（经典版）: 华尔街量化投资传奇\n",
      "Data-Driven Science and Engineering: Machine Learning, Dynam\n",
      "Introduction to Information Retrieval\n",
      "机器翻译\n",
      "机器人法\n",
      "算法交易员：会赚钱的人工智能\n",
      "金羊毛: 世界科幻大师丛书\n",
      "万物都相爱\n",
      "神经网络与机器学习（原书第3版）\n",
      "游戏人工智能\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=180&type=T\n",
      "剑桥五重奏——机器能思考吗？: 机器能思考吗？\n",
      "皇帝新脑\n",
      "Introduction to Linear Algebra: Fifth Edition\n",
      "语言与心智\n",
      "强化学习：原理与Python实现\n",
      "机·智: 从数字化车间走向智能制造\n",
      "What Computers Still Can't Do: A Critique of Artificial Reason\n",
      "2小时读懂物联网\n",
      "人工智能: 人工智能·智能系统指南（原书第2版）\n",
      "Machine Learning: A Probabilistic Perspective\n",
      "The Sciences of the Artificial\n",
      "人工智能十万个为什么：热AI\n",
      "人工智能与法律的对话\n",
      "如何创造可信的AI\n",
      "人工智能: 一种现代的方法(第2版)(影印版)\n",
      "智能语音时代：商业竞争、技术创新与虚拟永生: 麻省理工科技评论2019全球十大突破性技术，解密苹果、谷歌、Facebook、微\n",
      "虚拟人\n",
      "机器学习：算法背后的理论与优化（中外学者论AI）\n",
      "计算机与人脑\n",
      "意识的解释\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=200&type=T\n",
      "文本数据管理与分析：信息检索与文本挖掘的实用导论\n",
      "无心的机器\n",
      "Programming Game AI by Example\n",
      "Human Compatible: Artificial Intelligence and the Problem of Control\n",
      "数字中国: 区块链、智能革命与国家治理的未来\n",
      "人工智能及其应用: 第4版\n",
      "和机器人一起进化: Generation Robot\n",
      "人机共生：谁是不会被机器替代的人（托马斯·达文波特智能商业五部曲）\n",
      "统计自然语言处理（第2版）\n",
      "Foundations of Statistical Natural Language Processing\n",
      "机器学习与优化\n",
      "剑桥五重奏: 机器能思考吗\n",
      "爱犯错的智能体\n",
      "人工智能时代的教育革命\n",
      "Causality: Models, Reasoning and Inference\n",
      "无所遁形\n",
      "数据科学家访谈录: 25位著名数据科学家的真知灼见\n",
      "What to Think About Machines That Think: Today's Leading Thinkers on the Age of Machine Intelligence\n",
      "无人军队: 自主武器与未来战争\n",
      "算法的陷阱: 超级平台、算法垄断与场景欺骗\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=220&type=T\n",
      "Neural Networks and Deep Learning\n",
      "人工智能会抢哪些工作\n",
      "深度学习的数学\n",
      "进击的科技: 从爱因斯坦到人工智能\n",
      "人工智能简史\n",
      "技术奇点\n",
      "有限理性适应性工具箱: 适应性工具箱\n",
      "图灵的大教堂: 数字宇宙开启智能时代\n",
      "Advances in Financial Machine Learning\n",
      "心智: 认知科学导论\n",
      "Gödel, Escher, Bach: An Eternal Golden Braid\n",
      "Neural Networks and Deep Learning\n",
      "Army of None: Autonomous Weapons and the Future of War\n",
      "工具，还是武器？: 直面人类科技最紧迫的争议性问题\n",
      "模式识别\n",
      "游戏人工智能编程案例精粹\n",
      "Make Your Own Neural Network\n",
      "Artificial Intelligence for Everyone\n",
      "用户体验设计指南：从方法论到产品设计实践\n",
      "白话大数据与机器学习\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=240&type=T\n",
      "Python Machine Learning: Machine Learning and Deep Learning with Python, scikit-learn, and TensorFlow, 2nd Edition\n",
      "模式识别: 第四版\n",
      "风向: 如何应对互联网变革下的知识焦虑、不确定与个人成长\n",
      "人类活动中的理性\n",
      "创世纪\n",
      "The Creativity Code\n",
      "TensorFlow机器学习项目实战\n",
      "自动机器学习入门与实践: 使用Python\n",
      "情感与学习技术的新视角（21世纪人类学习的革命）\n",
      "计算机视觉: 模型、学习和推理\n",
      "机器生命的秘密\n",
      "人工智能\n",
      "TensorFlow实战\n",
      "Vision: A Computational Investigation into the Human Representation and Processing of Visual Information\n",
      "Python编程第4版\n",
      "未来简史\n",
      "文本数据挖掘\n",
      "人工智能: 复杂问题求解的结构和策略(原书第6版)\n",
      "Prediction Machines: The Simple Economics of Artificial Intelligence\n",
      "人工智能超越人类：技术奇点的冲击\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=260&type=T\n",
      "认知科学哲学问题研究\n",
      "智能问答与深度学习\n",
      "Learning Deep Architectures for AI\n",
      "人工智能: 开启颠覆性智能时代\n",
      "产品改变世界: Siri如何成功创造千亿市场\n",
      "Python Machine Learning Cookbook\n",
      "数字创世纪: 人工生命的新科学\n",
      "区块链与人工智能：数字经济新时代: 畅销书《区块链与新经济：数字货币2.0时代》全新修订升级版。《互联网\n",
      "面向机器智能的TensorFlow实践\n",
      "数学之美\n",
      "A New Kind of Science\n",
      "计算机视觉: 算法与应用\n",
      "智能问答\n",
      "集体智慧编程\n",
      "Neural Network Methods in Natural Language Processing\n",
      "神经网络原理(原书第2版)\n",
      "人工科学\n",
      "Handbook of Collective Intelligence\n",
      "Artificial Intelligence: A Modern Approach , 4th Edition\n",
      "隐藏的行为: 塑造未来的7种无形力量\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=280&type=T\n",
      "预见人力资源——新时代HR的进化方法论\n",
      "自然语言处理综论（第二版）\n",
      "The Modularity of Mind: An Essay on Faculty Psychology\n",
      "教育的未来：人工智能时代的教育变革\n",
      "区块链+人工智能 下一个改变世界的经济新模式: 下一个改变世界的经济新模式\n",
      "白话深度学习与TensorFlow\n",
      "图像局部不变性特征与描述\n",
      "机器之心\n",
      "未来医疗: 智能时代的个体医疗革命\n",
      "人工智能导论\n",
      "“深蓝”揭秘: 追寻人工智能圣杯之旅\n",
      "新机器的灵魂\n",
      "知识图谱\n",
      "逻辑人生: 哥德尔传\n",
      "You Look Like a Thing and I Love You: How Artificial Intelligence Works and Why It's Making the World a Weirder Place\n",
      "机器学习: 贝叶斯和优化方法\n",
      "解密搜索引擎技术实战\n",
      "我是阿爾法: 論法和人工智能\n",
      "Artificial Intelligence for Games (The Morgan Kaufmann Series in Interactive 3D Technology)\n",
      "Introduction to Automata Theory,Languages, and Computation\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=300&type=T\n",
      "I Am a Strange Loop\n",
      "仿生人会梦见电子羊吗？\n",
      "Automation and Utopia: Human Flourishing in a World without Work\n",
      "无人机网络与通信\n",
      "贝叶斯网引论\n",
      "贤二机器僧漫游人工智能\n",
      "Python数据分析与挖掘实战\n",
      "特征提取与图像处理\n",
      "狡猾的情感: 为何愤怒、嫉妒、偏见让我们的决策更理性\n",
      "Artificial Intelligence: A Very Short Introducion\n",
      "Artificial Intelligence: Structures and Strategies for Complex Problem Solving (6th Edition)\n",
      "人工智能狂潮: 机器人会超越人类吗？\n",
      "Computability and Logic\n",
      "人工智能导论: 人工智能导论\n",
      "刷脸背后: 人脸检测 人脸识别 人脸检索\n",
      "可穿戴创意设计：技术与时尚的融合\n",
      "Artificial Intelligence in the Age of Neural Networks and Brain Computing\n",
      "今日简史: 人类命运大议题\n",
      "AI改变设计——人工智能时代的设计师生存手册\n",
      "贪婪的大脑: 为何人类会无止境地寻求意义\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=320&type=T\n",
      "智能客服机器人\n",
      "Understanding Machine Learning: From Theory to Algorithms\n",
      "人工智能: 国家人工智能战略行动抓手\n",
      "AI思维: 从数据中创造价值的炼金术\n",
      "Google如何统治世界:人工智能会是人类的敌人吗?\n",
      "情感解剖图鉴\n",
      "科学+预见人工智能\n",
      "机器学习系统设计\n",
      "控制论: 或关于在动物和机器中控制和通信的科学\n",
      "新机器的灵魂\n",
      "经济奇点: 人工智能时代,我们将如何谋生?\n",
      "我眼中的Master\n",
      "让生活更美好: 无线电科普丛书\n",
      "MXNet深度学习实战\n",
      "罐装神仙-壹\n",
      "数理情感学: 人类情感的数学逻辑\n",
      "Architects of Intelligence: The truth about AI from the people building it\n",
      "人工智能原理与方法\n",
      "第一本无人驾驶技术书\n",
      "白话机器学习算法\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=340&type=T\n",
      "感情研究指南: 情感史的框架\n",
      "机器文明数学本质\n",
      "Machine Learning: An Algorithmic Perspective\n",
      "数据挖掘中的新方法：支持向量机: 支持向量机\n",
      "Mind as Machine: A History of Cognitive Science\n",
      "孤高求败: 阿尔法GO60局精彩绝招详解\n",
      "人机共生: 当爱情、战争和生活都自动化了,人类该如何自处\n",
      "走近2050：注意力、互联网与人工智能\n",
      "Theory of Self-Reproducing Automata\n",
      "从无限运算力到无限想象力：设计人工智能概览\n",
      "深度学习核心技术与实践\n",
      "机器世界\n",
      "人工智能关我什么事: 全面了解人工智能如何改变日常生活\n",
      "喝掉这“罐”书\n",
      "游戏编程中的人工智能技术\n",
      "Surfaces and Essences: Analogy as the Fuel and Fire of Thinking\n",
      "Artificial Intelligence: Foundations of Computational Agents\n",
      "超人诞生: 人类增强的新技术\n",
      "没有思想的世界: 科技巨头对独立思考的威胁\n",
      "ROS机器人程序设计: （原书第二版）\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=360&type=T\n",
      "人工智能新时代：全球人工智能应用真实落地50例\n",
      "起点人\n",
      "大腦解密手冊: 誰在做決策、現實是什麼、為何沒有人是孤島、科技將如何改變大腦的未\n",
      "游戏开发中的人工智能\n",
      "机器学习导论（原书第3版）\n",
      "概率图模型：原理与技术\n",
      "Python自然语言处理实战: 核心技术与算法\n",
      "推荐系统开发实战\n",
      "机器危机\n",
      "Python自然语言处理\n",
      "神经网络控制\n",
      "玩家\n",
      "漫画机器学习入门\n",
      "科技之巅: 《麻省理工科技评论》50大全球突破性技术深度剖析\n",
      "超级技术: 改变未来社会和商业的技术趋势\n",
      "Computer Vision: Models, Learning, and Inference\n",
      "内向者沟通圣经\n",
      "机器学习: 实用案例解析\n",
      "情感计算\n",
      "白话强化学习与PyTorch\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=380&type=T\n",
      "营销三大算法: 引领营销进入算法时代\n",
      "《裂变：秒懂人工智能的基础课》\n",
      "AI+医疗健康: 智能化医疗健康的应用与未来\n",
      "Python程序员面试笔试宝典\n",
      "统计学关我什么事: 生活中的极简统计学\n",
      "灵魂机器的时代：当计算机超过人类智能时\n",
      "计算机视觉: 一种现代方法\n",
      "机器与人：埃森哲论新人工智能: 埃森哲论新人工智能\n",
      "游戏人工智能编程案例精粹\n",
      "解析几何 (第三版)\n",
      "精通数据科学：从线性回归到深度学习\n",
      "模式分类: 原书第2版\n",
      "人脸识别原理及算法: 动态人脸识别系统研究\n",
      "The Algebraic Mind: Integrating Connectionism and Cognitive Science (Learning, Development, and Conceptual Change)\n",
      "统计机器学习导论\n",
      "赛先生的梦魇: 新技术革命二十讲\n",
      "TensorFlow技术解析与实战\n",
      "企业人工智能战略\n",
      "逻辑的引擎\n",
      "The Age of Spiritual Machines: When Computers Exceed Human Intelligence\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=400&type=T\n",
      "Python机器学习经典实例\n",
      "Python数据科学与机器学习\n",
      "精通Visual C++指纹模式识别系统算法及实现\n",
      "迷人的技术\n",
      "Race Against the Machine: How the Digital Revolution is Accelerating Innovation, Driving Productivity, and Irreversibly Tr\n",
      "神经网络与深度学习\n",
      "On Intelligence: How a New Understanding of the Brain will Lead to the Creation of Truly Intelligent Machines\n",
      "身体的智能: 智能科学新视角\n",
      "大数据智能: 互联网时代的机器学习和自然语言处理技术\n",
      "The New Division of Labor: How Computers Are Creating the Next Job Market\n",
      "解析深度学习：语音识别实践\n",
      "一本书读懂人工智能\n",
      "如何求解问题: 现代启发式方法\n",
      "第四次教育革命: 人工智能如何改变教育\n",
      "The AI Delusion\n",
      "The Mind's I: Fantasies And Reflections On Self & Soul\n",
      "智能摄影测量学导论\n",
      "让法律人读懂人工智能\n",
      "Neural Networks and Statistical Learning\n",
      "Neural Networks and Learning Machines: Third Edition\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=420&type=T\n",
      "内容算法: 把内容变成价值的效率系统\n",
      "复杂的引擎\n",
      "人工智能导论\n",
      "超级思维: 人类和计算机一起思考的惊人力量\n",
      "Talking Nets\n",
      "计算机程序的构造和解释: 原书第2版\n",
      "Memory and the Computational Brain: Why Cognitive Science will Transform Neuroscience\n",
      "Matrix Computations\n",
      "The Philosophy of Artificial Intelligence\n",
      "人工智能会取代人类吗？: 智能时代的人类未来\n",
      "机器学习在线：解析阿里云机器学习平台\n",
      "人有人的用处: 控制论与社会\n",
      "必然\n",
      "通信与移动系统\n",
      "新版机器人技术手册\n",
      "推荐系统: 技术、评估及高效算法\n",
      "人工智能的冲击: 失去工作，还是不用工作？\n",
      "计算机和人脑\n",
      "香农传: 从0到1开创信息时代\n",
      "大数据架构商业之路: 从业务需求到技术方案\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=440&type=T\n",
      "深入理解XGBoost：高效机器学习算法与进阶\n",
      "Deep Medicine: How Artificial Intelligence Can Make Healthcare Human Again\n",
      "深度强化学习: 原理与实践\n",
      "克隆版大脑\n",
      "聊天机器人：对话式体验产品设计\n",
      "Neural-Symbolic Cognitive Reasoning\n",
      "统计之美: 人工智能时代的科学思维\n",
      "超人类革命: 生物科技将如何改变我们的未来？\n",
      "机器翻译简明教程: 翻译专业本科生系列教材\n",
      "Probably Approximately Correct: Nature’s Algorithms for Learning and Prospering in a Complex World\n",
      "从掷骰子到阿尔法狗：趣谈概率\n",
      "智能驾驶技术：路径规划与导航控制\n",
      "计算智能导论\n",
      "Python机器学习（原书第2版)\n",
      "三体智能革命\n",
      "计算机科学中的数学: 信息与智能时代的必修课\n",
      "中国城市大洗牌\n",
      "人工智能革命: 历史、当下与未来\n",
      "AI的25种可能\n",
      "玩具\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=460&type=T\n",
      "数据挖掘导论\n",
      "深度学习与计算机视觉: 算法原理、框架应用与代码实现\n",
      "Reinforcement Learning: An Introduction\n",
      "Life 3.0: Being Human in the Age of Artificial Intelligence\n",
      "Machine Learning in Action\n",
      "I Am a Strange Loop\n",
      "反常识\n",
      "The Future of the Mind: The Scientific Quest to Understand, Enhance, and Empower the Mind\n",
      "2030年の世界地図帳: あたらしい経済とSDGs、未来への展望\n",
      "2030·终点镇\n",
      "心我论: 对自我和灵魂的奇思冥想\n",
      "5G+AI智能商业：商业变革和产业机遇\n",
      "人工智能学院本硕博培养体系\n",
      "Artificial Intelligence (3rd Edition)\n",
      "AI世代生存哲學大思考: 人人都必須了解的「新AI學」\n",
      "The Zero Marginal Cost Society: The Internet of Things, the Collaborative Commons, and the Eclipse of Capitalism\n",
      "Hello World: How to be Human in the Age of the Machine\n",
      "Learning with Kernels: Support Vector Machines, Regularization, Optimization, and Beyond (Adaptive Computation and Mach\n",
      "The Creativity Code: Art and Innovation in the Age of AI\n",
      "Abstraction in Artificial Intelligence and Complex Systems\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=480&type=T\n",
      "Python深度学习: 用Python快速学习深度神经网络\n",
      "人类帝国的覆灭: 一个机器人的回忆录\n",
      "Statistical Rethinking: A Bayesian Course with Examples in R and Stan\n",
      "用户的本质: 数字化时代的精准运营法则\n",
      "人工智能导论\n",
      "未来地图: 创造人工智能万亿级产业的商业模式和路径\n",
      "Keras快速上手：基于Python的深度学习实战\n",
      "Feature Selection for High-Dimensional Data (Artificial Intelligence: Foundations, Theory, and Algorithms)\n",
      "万物重构：智能社会来临前夜的思索\n",
      "未来之路: 科技、商业和人类的选择\n",
      "大脑、机器和数学\n",
      "人工智能的进化: 计算机思维离人类心智还有多远\n",
      "Programming Collective Intelligence: Building Smart Web 2.0 Applications\n",
      "Handbook of Research on Synthesizing Human Emotion in Intelligent Systems and Robotics(智能系统与机器人技术的合成人类情感研究手册（丛书）)\n",
      "微表情心理学: 读心识人准到骨子\n",
      "海伯利安\n",
      "Introduction to Bayesian Scientific Computing: Ten Lectures on Subjective Computing (Surveys and Tutorials in the Applied Mathematical Sciences\n",
      "机器人战争: 21世纪机器人技术革命与反思\n",
      "认知神经科学: 关于心智的生物学\n",
      "算法小时代: 从数学到生活的历变\n",
      "https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start=500&type=T\n",
      "Computer Vision: Algorithms and Applications\n",
      "Darwin among the Machines: The Evolution of Global Intelligence\n",
      "The Most Human Human: What Talking with Computers Teaches Us About What It Means to Be Alive\n",
      "失控: 全人类的最终命运和结局\n",
      "人类简史: 从动物到上帝\n"
     ]
    }
   ],
   "source": [
    "import re\n",
    "import time\n",
    "\n",
    "page_id = 1\n",
    "last_start = 0\n",
    "while 1:\n",
    "    start_id = 20 * (page_id - 1)\n",
    "    url = 'https://book.douban.com/tag/%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD?start={}&type=T'.format(start_id)\n",
    "    print(url)\n",
    "    content = douban_crawler.download(url)\n",
    "    tree = html.fromstring(content)\n",
    "    if page_id == 1:\n",
    "        page_links = tree.xpath(\"//div[@class='paginator']/a[last()]/@href\")\n",
    "        if page_links:\n",
    "            last_start = int(re.findall('start=(\\d+)', page_links[0])[0])\n",
    "            print('Last Start ID: ', last_start)\n",
    "    book_infos = tree.xpath(\"//li[@class='subject-item']\")\n",
    "    for book_info in book_infos:\n",
    "        book_name_elem = book_info.xpath('.//h2/a')[0]\n",
    "        book_name = re.sub('\\s{2,}', '', book_name_elem.text_content().replace('\\n', ''))\n",
    "        book_url = book_name_elem.attrib['href']\n",
    "        book_pub_info = book_info.xpath(\".//div[@class='pub']\")[0].text.strip()\n",
    "        book_intro = 'N/A'\n",
    "        book_intro_elem = book_info.xpath(\".//div[@class='info']/p\")\n",
    "        if book_intro_elem:\n",
    "            book_intro = book_intro_elem[0].text.strip()\n",
    "        print(book_name)\n",
    "    page_id += 1\n",
    "    if start_id == last_start:\n",
    "        break\n",
    "    time.sleep(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Python深度学习: 用Python快速学习深度神经网络 '"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "re.sub('\\s{2,}', '', 'Python深度学习           : 用Python快速学习深度神经网络 ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "s = '/tag/神经网络888?start=20&type=T'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'20'"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "re.findall('start=(\\d+)', s)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'20'"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s[s.index('start=')+6:-7]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.index('start=')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.index('&type')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
